home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Disc to the Future 2
/
Disc to the Future Part II Programmer's Reference (Wayzata Technology)(6013)(1992).bin
/
UNIX
/
C
/
INDENT
/
LEXI.C
< prev
next >
Wrap
C/C++ Source or Header
|
1989-09-02
|
16KB
|
621 lines
/*
* Copyright (c) 1985 Sun Microsystems, Inc.
* Copyright (c) 1980 The Regents of the University of California.
* Copyright (c) 1976 Board of Trustees of the University of Illinois.
* All rights reserved.
*
* Redistribution and use in source and binary forms are permitted
* provided that the above copyright notice and this paragraph are
* duplicated in all such forms and that any documentation,
* advertising materials, and other materials related to such
* distribution and use acknowledge that the software was developed
* by the University of California, Berkeley, the University of Illinois,
* Urbana, and Sun Microsystems, Inc. The name of either University
* or Sun Microsystems may not be used to endorse or promote products
* derived from this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
#ifndef lint
static char sccsid[] = "@(#)lexi.c 5.11 (Berkeley) 9/15/88";
#endif /* not lint */
/*
* Here we have the token scanner for indent. It scans off one token and puts
* it in the global variable "token". It returns a code, indicating the type
* of token scanned.
*/
#include "indent_globs.h"
#include <ctype.h>
#define alphanum 1
#define opchar 3
enum rwcodes {
rw_none,
rw_break,
rw_switch,
rw_case,
rw_struct_like, /* struct, enum, union */
rw_decl,
rw_sp_paren, /* if, while, for */
rw_sp_nparen, /* do, else */
rw_sizeof
};
struct templ {
char *rwd;
enum rwcodes rwcode;
};
struct templ *user_specials = 0;
unsigned int user_specials_max, user_specials_idx;
char chartype[128] =
{ /* this is used to facilitate the decision of
* what type (alphanumeric, operator) each
* character is */
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 3, 0, 0, 1, 3, 3, 0,
0, 0, 3, 3, 0, 3, 0, 3,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 0, 0, 3, 3, 3, 3,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 3, 1,
0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 3, 0, 3, 0
};
/* The generated perfect hash function functions that recognize the reserved words.
C code produced by gperf version 1.8.1 (GNU C++ version)
Command-line: gperf -c -p -t -T -g -j1 -o -K rwd -N is_reserved indent.gperf */
#define MIN_WORD_LENGTH 2
#define MAX_WORD_LENGTH 8
#define MIN_HASH_VALUE 4
#define MAX_HASH_VALUE 40
/*
29 keywords
37 is the maximum key range
*/
#ifdef __GNUC__
inline
#endif
static int
hash (str, len)
register char *str;
register int len;
{
static unsigned char hash_table[] =
{
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
40, 40, 40, 40, 40, 40, 40, 40, 24, 9,
10, 0, 15, 20, 6, 8, 40, 0, 0, 19,
1, 16, 40, 40, 1, 0, 0, 12, 21, 4,
40, 40, 40, 40, 40, 40, 40, 40,
};
return len + hash_table[str[len - 1]] + hash_table[str[0]];
}
#ifdef __GNUC__
inline
#endif
struct templ*
is_reserved (str, len)
register char *str;
register int len;
{
static struct templ wordlist[] =
{
{"",}, {"",}, {"",}, {"",},
{"else", rw_sp_nparen,},
{"short", rw_decl,},
{"struct", rw_struct_like,},
{"extern", rw_decl,},
{"return", rw_break,},
{"while", rw_sp_paren,},
{"register", rw_decl,},
{"int", rw_decl,},
{"switch", rw_switch,},
{"case", rw_case,},
{"char", rw_decl,},
{"static", rw_decl,},
{"double", rw_decl,},
{"default", rw_case,},
{"union", rw_struct_like,},
{"for", rw_sp_paren,},
{"float", rw_decl,},
{"sizeof", rw_sizeof,},
{"typedef", rw_decl,},
{"enum", rw_struct_like,},
{"long", rw_decl,},
{"if", rw_sp_paren,},
{"global", rw_decl,},
{"va_dcl", rw_decl,},
{"do", rw_sp_nparen,},
{"break", rw_break,},
{"unsigned", rw_decl,},
{"",}, {"",}, {"",}, {"",},
{"void", rw_decl,},
{"",}, {"",}, {"",}, {"",},
{"goto", rw_break,},
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
{
register int key = hash (str, len);
if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
{
register char *s = wordlist[key].rwd;
if (*s == *str && !strncmp (str + 1, s + 1, len - 1))
return &wordlist[key];
}
}
return 0;
}
enum codes
lexi()
{
/* used to walk through the token */
char *tok;
int unary_delim; /* this is set to 1 if the current token
*
* forces a following operator to be unary */
static enum codes last_code; /* the last token type returned */
static int l_struct; /* set to 1 if the last token was 'struct' */
enum codes code; /* internal code to be returned */
char qchar; /* the delimiter character for a string */
unary_delim = false;
parser_state_tos->col_1 = parser_state_tos->last_nl; /* tell world that this token started in
* column 1 iff the last thing scanned was nl */
parser_state_tos->last_nl = false;
while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
parser_state_tos->col_1 = false; /* leading blanks imply token is not in column
* 1 */
if (++buf_ptr >= buf_end)
fill_buffer();
}
token = buf_ptr;
/* Scan an alphanumeric token */
if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
/*
* we have a character or number
*/
register char *j; /* used for searching thru list of
*
* reserved words */
register struct templ *p;
if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
int seendot = 0,
seenexp = 0;
if (*buf_ptr == '0' &&
(buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
buf_ptr += 2;
while (isxdigit(*buf_ptr))
buf_ptr++;
}
else
while (1) {
if (*buf_ptr == '.')
if (seendot)
break;
else
seendot++;
buf_ptr++;
if (!isdigit(*buf_ptr) && *buf_ptr != '.')
if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
break;
else {
seenexp++;
seendot++;
buf_ptr++;
if (*buf_ptr == '+' || *buf_ptr == '-')
buf_ptr++;
}
}
if (*buf_ptr == 'L' || *buf_ptr == 'l')
buf_ptr++;
}
else
while (chartype[*buf_ptr] == alphanum) { /* copy it over */
buf_ptr++;
if (buf_ptr >= buf_end)
fill_buffer();
}
token_end = buf_ptr;
while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
if (++buf_ptr >= buf_end)
fill_buffer();
}
parser_state_tos->its_a_keyword = false;
parser_state_tos->sizeof_keyword = false;
if (l_struct) { /* if last token was 'struct', then this token
* should be treated as a declaration */
l_struct = false;
last_code = ident;
parser_state_tos->last_u_d = true;
return (decl);
}
parser_state_tos->last_u_d = false; /* Operator after indentifier is binary */
last_code = ident; /* Remember that this is the code we will
* return */
/* Check whether the token is a reserved word. Use perfect hashing... */
p = is_reserved (token, token_end - token);
if (p) { /* we have a keyword */
found_keyword:
parser_state_tos->its_a_keyword = true;
parser_state_tos->last_u_d = true;
switch (p->rwcode) {
case rw_switch: /* it is a switch */
return (swstmt);
case rw_case: /* a case or default */
return (casestmt);
case rw_struct_like: /* a "struct" */
if (parser_state_tos->p_l_follow)
break; /* inside parens: cast */
l_struct = true;
/*
* Next time around, we will want to know that we have had a
* 'struct'
*/
case rw_decl: /* one of the declaration keywords */
if (parser_state_tos->p_l_follow) {
parser_state_tos->cast_mask |= 1 << parser_state_tos->p_l_follow;
break; /* inside parens: cast */
}
last_code = decl;
return (decl);
case rw_sp_paren: /* if, while, for */
return (sp_paren);
case rw_sp_nparen: /* do, else */
return (sp_nparen);
case rw_sizeof:
parser_state_tos->sizeof_keyword = true;
default: /* all others are treated like any other
* identifier */
return (ident);
} /* end of switch */
} /* end of if (found_it) */
if (*buf_ptr == '(' && parser_state_tos->tos <= 1 && parser_state_tos->ind_level == 0) {
register char *tp = buf_ptr;
while (tp < buf_end)
if (*tp++ == ')' && (*tp == ';' || *tp == ','))
goto not_proc;
parser_state_tos->procname = token;
parser_state_tos->procname_end = token_end;
parser_state_tos->in_parameter_declaration = 1;
not_proc:;
}
/*
* The following hack attempts to guess whether or not the current
* token is in fact a declaration keyword -- one that has been
* typedefd
*/
if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
&& !parser_state_tos->p_l_follow
&& !parser_state_tos->block_init
&& (parser_state_tos->last_token == rparen || parser_state_tos->last_token == semicolon ||
parser_state_tos->last_token == decl ||
parser_state_tos->last_token == lbrace || parser_state_tos->last_token == rbrace)) {
parser_state_tos->its_a_keyword = true;
parser_state_tos->last_u_d = true;
last_code = decl;
return decl;
}
if (last_code == decl) /* if this is a declared variable, then
* following sign is unary */
parser_state_tos->last_u_d = true; /* will make "int a -1" work */
last_code = ident;
return (ident); /* the ident is not in the list */
} /* end of procesing for alpanum character */
/* Scan a non-alphanumeric token */
/* If it is not a one character token, token_end will get changed
later. */
token_end = buf_ptr + 1;
if (++buf_ptr >= buf_end)
fill_buffer();
switch (*token) {
case '\n':
unary_delim = parser_state_tos->last_u_d;
parser_state_tos->last_nl = true; /* remember that we just had a newline */
code = (had_eof ? code_eof : newline);
/*
* if data has been exausted, the newline is a dummy, and we should
* return code to stop
*/
break;
case '\'': /* start of quoted character */
case '"': /* start of string */
qchar = *token;
/* Find out how big the literal is so we can set token_end. */
/* Invariant: before loop test buf_ptr points to the next */
/* character that we have not yet checked. */
while (*buf_ptr != qchar && *buf_ptr != 0 && *buf_ptr != '\n')
{
if (*buf_ptr == '\\')
{
buf_ptr++;
if (buf_ptr >= buf_end)
fill_buffer ();
if (*buf_ptr == '\n')
++line_no;
if (*buf_ptr == 0)
break;
}
buf_ptr++;
if (buf_ptr >= buf_end)
fill_buffer ();
}
if (*buf_ptr == '\n' || *buf_ptr == 0)
{
diag (1,
qchar == '\''
? "Unterminated character constant"
: "Unterminated string constant"
);
}
else
{
/* Advance over end quote char. */
buf_ptr++;
if (buf_ptr >= buf_end)
fill_buffer ();
}
code = ident;
break;
case ('('):
case ('['):
unary_delim = true;
code = lparen;
break;
case (')'):
case (']'):
code = rparen;
break;
case '#':
unary_delim = parser_state_tos->last_u_d;
code = preesc;
break;
case '?':
unary_delim = true;
code = question;
break;
case (':'):
code = colon;
unary_delim = true;
break;
case (';'):
unary_delim = true;
code = semicolon;
break;
case ('{'):
unary_delim = true;
/* This check is made in the code for '='. No one who writes
initializers without '=' these days deserves to have indent
work on their code (besides which, uncommenting this would
screw up anything which assumes that parser_state_tos->block_init really
means you are in an initializer. */
/*
* if (parser_state_tos->in_or_st) parser_state_tos->block_init = 1;
*/
/* The following neat hack causes the braces in structure
initializations to be treated as parentheses, thus causing
initializations to line up correctly, e.g.
struct foo bar =
{{a,
b,
c},
{1,
2}};
If lparen is returned, token can be used to distinguish
between '{' and '(' where necessary. */
code = parser_state_tos->block_init ? lparen : lbrace;
break;
case ('}'):
unary_delim = true;
/* The following neat hack is explained under '{' above. */
code = parser_state_tos->block_init ? rparen : rbrace;
break;
case 014: /* a form feed */
unary_delim = parser_state_tos->last_u_d;
parser_state_tos->last_nl = true; /* remember this so we can set 'parser_state_tos->col_1'
* right */
code = form_feed;
break;
case (','):
unary_delim = true;
code = comma;
break;
case '.':
unary_delim = false;
code = period;
break;
case '-':
case '+': /* check for -, +, --, ++ */
code = (parser_state_tos->last_u_d ? unary_op : binary_op);
unary_delim = true;
if (*buf_ptr == token[0]) {
/* check for doubled character */
buf_ptr++;
/* buffer overflow will be checked at end of loop */
if (last_code == ident || last_code == rparen) {
code = (parser_state_tos->last_u_d ? unary_op : postop);
/* check for following ++ or -- */
unary_delim = false;
}
}
else if (*buf_ptr == '=')
/* check for operator += */
buf_ptr++;
else if (*buf_ptr == '>') {
/* check for operator -> */
buf_ptr++;
if (!pointer_as_binop) {
unary_delim = false;
code = unary_op;
parser_state_tos->want_blank = false;
}
}
break; /* buffer overflow will be checked at end of
* switch */
case '=':
if (parser_state_tos->in_or_st)
parser_state_tos->block_init = 1;
if (*buf_ptr == '=') /* == */
buf_ptr++;
code = binary_op;
unary_delim = true;
break;
/* can drop thru!!! */
case '>':
case '<':
case '!': /* ops like <, <<, <=, !=, etc */
if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
if (++buf_ptr >= buf_end)
fill_buffer();
}
code = (parser_state_tos->last_u_d ? unary_op : binary_op);
unary_delim = true;
break;
default:
if (token[0] == '/' && *buf_ptr == '*') {
/* it is start of comment */
if (++buf_ptr >= buf_end)
fill_buffer();
code = comment;
unary_delim = parser_state_tos->last_u_d;
break;
}
while (*(buf_ptr - 1) == *buf_ptr || *buf_ptr == '=') {
/*
* handle ||, &&, etc, and also things as in int *****i
*/
if (++buf_ptr >= buf_end)
fill_buffer();
}
code = (parser_state_tos->last_u_d ? unary_op : binary_op);
unary_delim = true;
} /* end of switch */
if (code != newline) {
l_struct = false;
last_code = code;
}
token_end = buf_ptr;
if (buf_ptr >= buf_end) /* check for input buffer empty */
fill_buffer();
parser_state_tos->last_u_d = unary_delim;
return (code);
}
/*
* Add the given keyword to the keyword table, using val as the keyword type
*/
addkey(key, val)
char *key;
enum rwcodes val;
{
register struct templ *p;
/* Check to see whether key is a reserved word or not. */
if (is_reserved (key, strlen (key)) != 0)
return;
if (user_specials == 0)
{
user_specials = (struct templ *) xmalloc (5 * sizeof (struct templ));
if (user_specials == 0)
{
fputs ("indent: out of memory\n", stderr);
exit (1);
}
user_specials_max = 5;
user_specials_idx = 0;
}
else if (user_specials_idx == user_specials_max)
{
user_specials_max += 5;
user_specials = (struct templ *) xrealloc ((char *) user_specials,
user_specials_max
* sizeof (struct templ));
}
p = &user_specials[user_specials_idx++];
p->rwd = key;
p->rwcode = val;
p[1].rwd = 0;
p[1].rwcode = rw_none;
return;
}